clear all
clear matrix
clear mata

set more off

local path = "...folder"

cd "`path'"

//This program calculates standard errors by bootstrap clustered by vi facility

set seed 12345

local reps = 1
local nreps = 1000


///Start replication 1 and put results in excel files 

forvalues `reps' = 1(1)1{

********************************
*step 1: draw bootstrap samples*
********************************

//step 1.1: draw bootstrap samples of steel based on facility IDs. 

use steel_plant_pf.dta, clear

/*generate log variables*/
gen y = log(production)
gen l = log(labor)
gen k = log(size)
gen logiron = log(tpigiron/1000)
gen logscrap = log(tscrap/1000) 
gen age = year - birth
gen m = logiron
gen p = logscrap /*use scrap steel as the proxy variable*/

gen dsecond = 0
replace dsecond = 1 if second > 0 & second < = 100

*trim outliers
drop if labor < 1 | labor > 500000

*merge with steel linkage
merge m:1 plant using steel_link.dta,keep(match)
drop _merge

gen firm_iron_steel = firm + steel_link /*create vi id*/

bsample, cluster(firm_iron_steel) idcluster(vi)

bysort plant time: gen identifier = _n /*identify duplicated equipment by bootstrap*/
bysort plant time: gen nb_dup = _N /*identify the number of duplicates used to expand upstream sample*/
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str /*generate new ids for bootstrap sample*/
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save steel_bootstrap.dta, replace 

//step 1.2: pick a sample of blast furnaces matched with steel_link

use steel_bootstrap.dta, clear
bysort firm_iron_steel: gen drop = _n
keep if drop == 1 
keep firm_iron_steel nb_dup  /*collect vi ids that appear in the bootstrap sample; keep the number of duplicates*/
save bootstrap_link_iron_steel.dta, replace

use iron_plant_pf.dta, clear

merge m:1 plant using iron_link.dta /*Add facility identifier*/
drop _merge

gen firm_iron_steel = firm + iron_steel_link /*create vi ids*/

merge m:1 firm_iron_steel using bootstrap_link_iron_steel.dta, keep(match)/*keep the Vis that appear in the bootstrap steel sample*/
drop _merge

drop if labor < 1
/*generate log variables*/
gen y = log(production)
gen l = log(labor)
gen k = log(size)
gen logpure = log((tore/1000)*(quality/100))
gen logenergy = log(tenergy/1000)
gen age = year - birth
gen m = logpure
gen p = logenergy

expand nb_dup /*expand samples to match with duplicated VIs in the bootstrap sample*/
drop nb_dup

bysort plant time: gen identifier = _n
bysort plant time: gen nb_dup = _N 
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save iron_bootstrap.dta, replace 

//step 1.3: *step 1.2: pick a sample of blast furnaces matched with steel_link

use iron_bootstrap.dta, clear
gen firm_sinter_iron = firm + sinter_iron_link
bysort firm_sinter_iron: egen max_dup = max(nb_dup)/*for structure where a set of sintering machines may map into multiple facilities, and only part of the matched facilities get expanded, we still expand the sintering machines*/
bysort firm_sinter_iron: gen drop = _n
keep if drop == 1
keep firm_sinter_iron max_dup
save bootstrap_link_sinter_iron.dta, replace

use sinter_plant_pf.dta, clear

merge m:1 plant using sinter_link.dta /*Add facility identifier*/
drop _merge

gen firm_sinter_iron = firm + sinter_link
merge m:1 firm_sinter_iron using bootstrap_link_sinter_iron.dta, keep(match)/*match with iron sample id*/
drop _merge

/*generate log variables*/
gen y = log(production*grade/100) 
gen l = log(labor)
gen k = log(size)
gen age = year - birth
gen logenergy = log(tenergy/1000)
gen p = logenergy /*proxy variable*/

/*drop outlier*/
gen p1 = log(energy)
drop if p1 < 0 | p1 > 6.9 
drop if labor > 10000 | labor < 1 
drop if grade > 100 
drop if firm == "首钢" & (size == 65 | size == 200)

expand max_dup /*expand sintering sample to match with iron sample*/
drop max_dup

bysort plant time: gen identifier = _n
bysort plant time: gen nb_dup = _N 
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save sinter_bootstrap.dta, replace 

*******************************
*step 2: estimate steel making*
*******************************
local dlwcprogram = "acf_mata_seel.do"

do `dlwcprogram'

use steel_bootstrap.dta, clear

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)5{
         local kmax = 5 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 5 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local mmax = 5 - `ll' - `kk' - `aa'
				   forvalues mm = 0(1)`mmax'{
				    local pmax = 5 - `ll' - `kk' - `mm' - `aa'
					forvalues pp = 0(1)`pmax'{
				    gen poly`ll'`kk'`mm'`aa'`pp' = (l^`ll')*(k^`kk')*(m^`mm')*(age^`aa')*(p^`pp')
				     }
				   }
			  }
		 }
}


*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* dsecond second i.t i.owner i.region"

xi: reg y `regphi'
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age
gen m_lag = L.m
gen dsecond_lag = L.dsecond
gen second_lag = L.second

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.
drop if m ==.
drop if m_lag == .
drop if phi==.
drop if phi_lag==.
drop if age ==.

*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values 
gen initiala = -0.000564216
gen initialk = 0.129031925
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.052450862
gen initialx = sqrt(initiall/(1-initiall))
gen initialm = 0.877800415

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3],beta_dlw[1,4])


putexcel A`reps' = mat(result) using acf_steel_bootstrap, sheet("iron_scrap") modify

matrix drop result

*--------------------------------steel making analysis-------------------------*
import excel "...folder name\acf_steel_bootstrap.xlsx", sheet("iron_scrap") clear
gen obs = _n
keep if obs == `reps'
rename A beta_l
rename B beta_k
rename C beta_m
rename D beta_a
save beta_steel_bootstrap.dta, replace

/*generate material elasticity*/
use beta_steel_bootstrap.dta, clear
keep beta_m 
rename beta_m gamma3

save gamma3.dta, replace 

/*generate steel TFP*/
use steel_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_steel_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age

*normalized tfp 
egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)
gen w_det_steel = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

keep newid plant_dup firm time year tfp w_det_steel 
rename tfp tfp_steel

gen t = string(time)
gen fid = plant_dup + t
drop t 

save steel_tfp_lmk_link_boot.dta, replace 

/*generate controls for upstream*/
use steel_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_steel_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age

*normalized tfp 
egen mean_tfp = mean(tfp)
replace tfp = tfp - mean_tfp
gen w_det_steel = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

drop t
gen t = string(time)

gen fid_iron_steel = firm + t + steel_link /*stack information at firm_linkage_time level, that is at the production chain level; to make sure that every firm every link of each period only has one observation*/

keep if identifier == 1

/*create weighted information*/
bysort fid_iron_steel: egen det_steel = sum(w_det_steel)
bysort fid_iron_steel: gen nb_steel = _N 

drop if tfp == . | k == . | l == .

replace tfp = tfp*w_det_steel/det_steel
bysort fid_iron_steel: egen tfp_steel = sum(tfp) /*logtfp weighted by deterministic components*/
replace k = k*w_det_steel/det_steel
bysort fid_iron_steel: egen k_steel = sum(k)
replace l = l*w_det_steel/det_steel
bysort fid_iron_steel: egen l_steel = sum(l)

bysort fid_iron_steel: gen structure = _n /*keep one observation for one chain at each time t*/
keep if structure == 1 

keep fid_iron_steel tfp_steel l_steel k_steel nb_steel 

save steel_tfp_lmk_iron_bootstrap.dta, replace

/*TFP analysis: steel making*/
use steel_bootstrap.dta, clear

keep newid plant_dup firm time production number size owner region second labor 

gen t = string(time)
gen fid = plant_dup + t
drop t 

merge 1:1 fid using steel_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_steel)

drop if logtfp > 2 | logtfp <= -2 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard errors for the estimates in Table 5 Column (7) and (8)*/
reg logtfp i.ownership i.time
outreg2 using steel_owner_boot, keep(i.ownership) noaster excel replace  

reg logtfp i.ownership logsize i.time
outreg2 using steel_owner_size_boot, keep(i.ownership logsize) noaster excel replace  

/*generate standard errors for the estimates in Table 8 Column (7) and (8)*/
replace second = 100 if second > 100

reg logtfp i.ownership second i.time
outreg2 using steel_quality_boot, keep(i.ownership second) noaster excel replace 

reg logtfp i.ownership logsize second i.time
outreg2 using steel_quality_size_boot, keep(i.ownership logsize second) noaster excel replace

**********************************
*step 3: estimate pig iron making*
**********************************
local dlwcprogram = "acf_mata_iron.do"

do `dlwcprogram'

use iron_bootstrap.dta, clear

gen tid = string(time)
gen fid_iron_steel = firm + tid + iron_steel_link

*merge with downstream steel information 
merge m:1 fid_iron_steel using steel_tfp_lmk_iron_bootstrap.dta 

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)5{
         local kmax = 5 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 5 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local mmax = 5 - `ll' - `kk' - `aa'
				   forvalues mm = 0(1)`mmax'{
				    local pmax = 5 - `ll' - `kk' - `mm' - `aa'
					forvalues pp = 0(1)`pmax'{
				    gen poly`ll'`kk'`mm'`aa'`pp' = (l^`ll')*(k^`kk')*(m^`mm')*(age^`aa')*(p^`pp')
				     }
				   }
			  }
		 }
}

*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* i.t i.owner i.region"

xi: reg y `regphi' tfp_steel l_steel k_steel nb_steel 
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*/
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age
gen m_lag = L.m

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.
drop if m ==.
drop if m_lag == .
drop if phi==.
drop if phi_lag==.
drop if age ==.

*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values
gen initiala = -0.000288024
gen initialk = 0.448629832
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.182428734
gen initialx = sqrt(initiall/(1-initiall))
gen initialm = 0.436671586

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3],beta_dlw[1,4])


putexcel A`reps' = mat(result) using acf_iron_bootstrap, sheet("energy") modify

matrix drop result

*---------------------pig-iron making analysis---------------------------------*
import excel "...folder name\acf_iron_bootstrap.xlsx", sheet("energy") clear
gen obs = _n
keep if obs == `reps'
rename A beta_l
rename B beta_k
rename C beta_m
rename D beta_a
save beta_iron_bootstrap.dta, replace

/*generate material elasticity*/
use beta_iron_bootstrap.dta, clear
keep beta_m 
rename beta_m gamma2

save gamma2.dta, replace 

/*generate TFP estimates*/
use iron_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_iron_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age
drop if labor < 1
egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)

/*generate deterministic part*/
gen w_det_iron = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

keep newid plant_dup firm time tfp w_det_iron
rename tfp tfp_iron

gen t = string(time)
gen fid = plant_dup + t
drop t

save iron_tfp_lmk_link_boot.dta, replace

/*generate controls for upstream stage sintering using linkage link_sinter_iron*/
use iron_bootstrap.dta, clear

gen tid = string(time)
gen fid_iron_steel = firm + tid + iron_steel_link

**merge with downstream steel information 
merge m:1 fid_iron_steel using steel_tfp_lmk_iron_bootstrap.dta 
drop _merge

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_iron_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age
drop if labor < 1
egen mean_tfp = mean(tfp)
replace tfp = tfp - mean_tfp/*keep logtfp here*/

/*generate deterministic part*/
gen w_det_iron = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

//generate controls for sintering 
gen fid_sinter_iron = firm + tid + sinter_iron_link

keep if identifier == 1 /*drop duplicated information otherwise would overcount*/
/*create weighted information*/
bysort fid_sinter_iron: egen det_iron = sum(w_det_iron)
bysort fid_sinter_iron: gen nb_iron = _N

drop if tfp == . | k == . | l == . | tfp_steel == . | k_steel == . | l_steel == . | nb_steel == . 

foreach var of varlist tfp k l tfp_steel k_steel l_steel{
replace `var' =`var'*w_det_iron/det_iron
bysort fid_sinter_iron: egen `var'_iron = sum(`var') /*logtfp weighted by deterministic components*/
}

bysort fid_sinter_iron: egen nb_steel_iron = sum(nb_steel) /*number of equipment don't need weight, simply the total number to reflect the structure*/

bysort fid_sinter_iron: gen structure = _n
keep if structure == 1 

keep fid_sinter_iron tfp_iron l_iron k_iron tfp_steel_iron l_steel_iron k_steel_iron nb_iron nb_steel_iron 

save iron_tfp_lmk_sinter_bootstrap.dta, replace

///TFP analysis: iron making 
use iron_bootstrap.dta, clear

keep newid plant_dup firm time year month production number size owner region labor grade1 

gen t = string(time)
gen fid = plant + t
drop t 

merge 1:1 fid using iron_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_iron)

drop if logtfp > 4 | logtfp < -4 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard errors for the estimates in Table 5 Column (5) and (6)*/
reg logtfp i.ownership i.time
outreg2 using iron_owner_boot, keep(i.ownership) noaster excel replace  

reg logtfp i.ownership logsize i.time
outreg2 using iron_owner_size_boot, keep(i.ownership logsize) noaster excel replace  

/*generate estimates in Table 8 Column (5) and (6)*/
replace grade1 = 100 if grade1 > 100

reg logtfp i.ownership grade1 i.time
outreg2 using iron_quality_boot, keep(i.ownership grade1) noaster excel replace

reg logtfp i.ownership logsize grade1 i.time
outreg2 using iron_quality_size_boot, keep(i.ownership logsize grade1) noaster excel replace

****************************
*step 4: estimate sintering* 
****************************

local dlwcprogram = "acf_mata_sinter.do"

do `dlwcprogram'

use sinter_bootstrap.dta, clear

gen tid = string(time)
gen fid_sinter_iron = firm + tid + sinter_link

**merge with downstream steel information 
merge m:1 fid_sinter_iron using iron_tfp_lmk_sinter_bootstrap.dta 

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)4{
         local kmax = 4 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 4 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local pmax = 4 - `ll' - `kk' - `aa'
				   forvalues pp = 0(1)`pmax'{
				   gen poly`ll'`kk'`aa'`pp' = (l^`ll')*(k^`kk')*(age^`aa')*(p^`pp')
				   }
			  }
		 }
}


*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* i.t i.owner i.region"

xi: reg y `regphi' tfp_steel_iron tfp_iron l_steel_iron l_iron k_steel_iron k_iron nb_iron nb_steel_iron 
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.

drop if phi==.
drop if phi_lag==.
drop if age ==.

*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values
gen initiala = -0.000515514
gen initialk = 0.877699269
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.187965831
gen initialx = sqrt(initiall/(1-initiall))

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3])


putexcel A`reps' = mat(result) using acf_sinter_bootstrap, sheet("y2_energy") modify

matrix drop result

/*********************************sintering analysis****************************/
import excel "...folder name\acf_sinter_link.xlsx", sheet("y2_energy") clear
sort D
keep if _n == 1
rename A beta_l
rename B beta_k
rename C beta_a
drop D
save beta_sinter_bootstrap.dta, replace

/*generate tfp estimates*/
use sinter_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_a = .

append using beta_sinter_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_a*age

egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)

/*generate deterministic part*/
gen w_det_sinter = exp(beta_l*l + beta_k*k + beta_a*age)

keep plant_dup firm time tfp w_det_sinter
rename tfp tfp_sinter

gen t = string(time)
gen fid = plant_dup + t
drop t

save sinter_tfp_lmk_link_boot.dta, replace

/*Start ownership analysis*/
use sinter_bootstrap.dta, clear

gen t = string(time)
gen fid = plant_dup + t
drop t 

merge 1:1 fid using sinter_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_sinter)

drop if logtfp > 5 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard error for the estimates in Table 5 Column (3) and (4) */ 
reg logtfp i.ownership i.time
outreg2 using sinter_owner_boot, keep(i.ownership) noaster excel replace  

reg logtfp i.ownership logsize i.time
outreg2 using sinter_owner_size_boot, keep(i.ownership logsize) noaster excel replace  

/*generate standard errors for the estimates in Table 8 Column (3) and (4)*/
replace grade_stability = 100 if grade_stability>100

reg logtfp i.ownership grade_stability i.time 
outreg2 using sinter_quality_boot, keep(i.ownership grade_stability) noaster excel replace 

reg logtfp i.ownership logsize grade_stability i.time
outreg2 using sinter_quality_size_boot, keep(i.ownership logsize grade_stability) noaster excel replace 


*********************************
*Step 4: Integrate aggregate TFP*
*********************************
/*Merge stage-level dataset with tfp estimates for sintering*/

use sinter_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region labor grade_stability firm_sinter_iron_dup

drop if production == .

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using sinter_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_sinter)
drop if logtfp > 5 

*adjust outliers of quality measure 
replace grade_stability = 0 if grade_stability < 0
replace grade_stability = 100 if grade_stability > 100

gen fid_sinter_iron = firm_sinter_iron_dup + tid

/*aggregation across plants within stage according to the facility identifier*/
*create weights
bysort fid_sinter_iron: egen det_sinter = sum(w_det_sinter)
bysort fid_sinter_iron: egen pdt_sinter = sum(production)
bysort fid_sinter_iron: gen nb_sinter = _N

gen tfp1_sinter = log(tfp_sinter) 

replace tfp1_sinter = tfp1_sinter*w_det_sinter/det_sinter 
bysort fid_sinter_iron: egen tfp1 = sum(tfp1_sinter) /*tfp by weight 1 using deterministic component*/

drop tfp1_sinter 
rename tfp1 tfp1_sinter

*create weighted quality
replace grade_stability = grade_stability*production/pdt_sinter
bysort fid_sinter_iron: egen sinter_quality1 = sum(grade_stability)

bysort fid_sinter_iron: gen n = _n
keep if n == 1
drop n 

keep firm_sinter_iron_dup fid_sinter_iron det_sinter pdt_sinter tfp1_sinter owner region sinter_quality1 nb_sinter
rename region region_sinter

save sinter_tfp_lmk_link_aggregate_boot.dta, replace

/*Merge stage-level dataset with tfp estimates for pig-iron making*/
use iron_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region grade1 firm_sinter_iron_dup firm_iron_steel_dup

drop if production == .

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using iron_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_iron)
drop if logtfp > 4 | logtfp < -4 

*iron quality outlier 
replace grade1 = 0 if grade1 < 0
replace grade1 = 100 if grade1 > 100

gen fid_sinter_iron = firm_sinter_iron + tid
gen fid_iron_steel = firm_iron_steel + tid 

merge m:1 fid_sinter_iron using sinter_tfp_lmk_link_aggregate_boot.dta
drop _merge

/*aggregation across plants within stage according to the facility identifier*/
**create weights
bysort fid_iron_steel: egen det_iron = sum(w_det_iron)
bysort fid_iron_steel: egen pdt_iron = sum(production)
bysort fid_iron_steel: gen nb_iron = _N

gen tfp1_iron = log(tfp_iron) 
replace tfp1_iron = tfp1_iron*w_det_iron/det_iron 
bysort fid_iron_steel: egen tfp1 = sum(tfp1_iron) /*tfp by weight 1 using deterministic component*/

drop tfp1_iron
rename tfp1 tfp1_iron

*weighted quality
replace grade1 = grade1*production/pdt_iron
bysort fid_iron_steel: egen iron_quality1 = sum(grade1)

*Ajust the weight for sintering plants according to according to fid_iron_steel
bysort fid_iron_steel fid_sinter_iron: gen n = _N
replace det_sinter = det_sinter/n
replace pdt_sinter = pdt_sinter/n
replace size_sinter = size_sinter/n
replace nb_sinter = nb_sinter/n
drop n

bysort fid_iron_steel: egen det_sinter_steel = sum(det_sinter)
bysort fid_iron_steel: egen pdt_sinter_steel = sum(pdt_sinter)
bysort fid_iron_steel: egen nb = sum(nb_sinter)
drop nb_sinter
rename nb nb_sinter 

********************************************************************************
replace tfp1_sinter = tfp1_sinter*det_sinter/det_sinter_steel
bysort fid_iron_steel: egen tfp1 = sum(tfp1_sinter)

drop tfp1_sinter 
rename tfp1 tfp1_sinter

*re-adjust sinter quality 
replace sinter_quality1 = sinter_quality1*pdt_sinter/pdt_sinter_steel
bysort fid_iron_steel: egen quality1 = sum(sinter_quality1)

drop sinter_quality1 
rename quality1 sinter_quality1

bysort fid_iron_steel: gen n = _n
keep if n == 1
drop n

keep owner firm firm_sinter_iron_dup fid_sinter_iron fid_iron_steel det_sinter_steel pdt_sinter_steel det_iron pdt_iron tfp1* sinter_quality* iron_quality1 nb* 

rename firm firm_add
rename owner owner_add

save sinter_iron_tfp_lmk_link_aggregate_boot.dta, replace

/**Merge stage-level dataset with tfp estimates for steel making and then merge with information on iron and sinter*/
use steel_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region second firm_iron_steel_dup

drop if production == . 

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using steel_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_steel) 
drop if logtfp > 2 | logtfp <= -2 

*steel quality outlier 
replace second = 0 if second < 0
replace second = 100 if second > 100


gen fid_iron_steel = firm_iron_steel_dup + tid

/*aggregation across plants within stage according to the facility identifier*/
*create weights
bysort fid_iron_steel: egen det_steel = sum(w_det_steel)
bysort fid_iron_steel: egen pdt_steel = sum(production)
bysort fid_iron_steel: gen nb_steel = _N

gen tfp1_steel = log(tfp_steel) 

replace tfp1_steel = tfp1_steel*w_det_steel/det_steel
bysort fid_iron_steel: egen tfp1 = sum(tfp1_steel) /*tfp by weight 1 using deterministic component*/

drop tfp1_steel 
rename tfp1 tfp1_steel

*weighted quality
replace second = second*production/pdt_steel
bysort fid_iron_steel: egen steel_quality1 = sum(second)

bysort fid_iron_steel: gen n = _n
keep if n == 1
drop n 

merge 1:1 fid_iron_steel using sinter_iron_tfp_lmk_link_aggregate_boot.dta
drop _merge

replace firm = firm_add if firm == ""
replace owner = owner_add if owner == ""

/*construct aggregate tfp using elasticities as weight*/
*method 1: using elasticity
gen gamma3 = .
gen gamme2 = .

append using gamma3.dta

replace gamma3 = gamma3[_N]
drop if _n == _N

append using gamma2.dta

replace gamma2 = gamma2[_N]
drop if _n == _N

gen tfp1 = tfp1_steel + gamma3*tfp1_iron + gamma2*gamma3*tfp1_sinter

save 3stage_tfp_lmk_link_aggregate_boot.dta, replace 

/*start facility-level ownership analysis*/
drop if plant == ""
drop if nb_sinter == 0 | nb_iron == 0 | nb_steel == 0
drop if nb_sinter == . | nb_iron == . | nb_steel == .

encode firm_iron_steel_dup, gen(vinew)
encode owner, gen(ownership) 
gen logsize = log(size_steel) 

xtset vinew time, monthly

/*generate standard errors for the estimates in Table 5 Column (1) and (2)*/
reg tfp1 i.ownership i.time
outreg2 using vi_owner_boot, keep(i.ownership) noaster excel replace 

reg tfp1 i.ownership logsize i.time
outreg2 using vi_owner_size_boot, keep(i.ownership logsize) noaster excel replace

/*generate standard errors for the estimates in Table 8 Column (3) and (4)*/
reg tfp1 i.ownership sinter_quality1 iron_quality1 steel_quality1 i.time
outreg2 using owner_quality_boot, keep(i.ownership) noaster excel replace

reg tfp1 i.ownership logsize sinter_quality1 iron_quality1 steel_quality1 i.time
outreg2 using owner_quality_size_boot, keep(i.ownership logsize) noaster excel replace


local reps = `reps' + 1
}


///Start replication 2 to 1000 and append results in the excel files created in replication 1

forvalues `reps' = 1(1)`nreps'{

********************************
*step 1: draw bootstrap samples*
********************************

//step 1.1: draw bootstrap samples of steel based on facility IDs. 

use steel_plant_pf.dta, clear

/*generate log variables*/
gen y = log(production)
gen l = log(labor)
gen k = log(size)
gen logiron = log(tpigiron/1000)
gen logscrap = log(tscrap/1000) 
gen age = year - birth
gen m = logiron
gen p = logscrap /*use scrap steel as the proxy variable*/

gen dsecond = 0
replace dsecond = 1 if second > 0 & second < = 100

*trim outliers
drop if labor < 1 | labor > 500000

*merge with steel linkage
merge m:1 plant using steel_link.dta,keep(match)
drop _merge

gen firm_iron_steel = firm + steel_link /*create vi id*/

bsample, cluster(firm_iron_steel) idcluster(vi)

bysort plant time: gen identifier = _n /*identify duplicated equipment by bootstrap*/
bysort plant time: gen nb_dup = _N /*identify the number of duplicates used to expand upstream sample*/
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str /*generate new ids for bootstrap sample*/
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save steel_bootstrap.dta, replace 

//step 1.2: pick a sample of blast furnaces matched with steel_link

use steel_bootstrap.dta, clear
bysort firm_iron_steel: gen drop = _n
keep if drop == 1 
keep firm_iron_steel nb_dup  /*collect vi ids that appear in the bootstrap sample; keep the number of duplicates*/
save bootstrap_link_iron_steel.dta, replace

use iron_plant_pf.dta, clear

merge m:1 plant using iron_link.dta /*Add facility identifier*/
drop _merge

gen firm_iron_steel = firm + iron_steel_link /*create vi ids*/

merge m:1 firm_iron_steel using bootstrap_link_iron_steel.dta, keep(match)/*keep the Vis that appear in the bootstrap steel sample*/
drop _merge

drop if labor < 1
/*generate log variables*/
gen y = log(production)
gen l = log(labor)
gen k = log(size)
gen logpure = log((tore/1000)*(quality/100))
gen logenergy = log(tenergy/1000)
gen age = year - birth
gen m = logpure
gen p = logenergy

expand nb_dup /*expand samples to match with duplicated VIs in the bootstrap sample*/
drop nb_dup

bysort plant time: gen identifier = _n
bysort plant time: gen nb_dup = _N 
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save iron_bootstrap.dta, replace 

//step 1.3: *step 1.2: pick a sample of blast furnaces matched with steel_link

use iron_bootstrap.dta, clear
gen firm_sinter_iron = firm + sinter_iron_link
bysort firm_sinter_iron: egen max_dup = max(nb_dup)/*for structure where a set of sintering machines may map into multiple facilities, and only part of the matched facilities get expanded, we still expand the sintering machines*/
bysort firm_sinter_iron: gen drop = _n
keep if drop == 1
keep firm_sinter_iron max_dup
save bootstrap_link_sinter_iron.dta, replace

use sinter_plant_pf.dta, clear

merge m:1 plant using sinter_link.dta /*Add facility identifier*/
drop _merge

gen firm_sinter_iron = firm + sinter_link
merge m:1 firm_sinter_iron using bootstrap_link_sinter_iron.dta, keep(match)/*match with iron sample id*/
drop _merge

/*generate log variables*/
gen y = log(production*grade/100) 
gen l = log(labor)
gen k = log(size)
gen age = year - birth
gen logenergy = log(tenergy/1000)
gen p = logenergy /*proxy variable*/

/*drop outlier*/
gen p1 = log(energy)
drop if p1 < 0 | p1 > 6.9 
drop if labor > 10000 | labor < 1 
drop if grade > 100 
drop if firm == "首钢" & (size == 65 | size == 200)

expand max_dup /*expand sintering sample to match with iron sample*/
drop max_dup

bysort plant time: gen identifier = _n
bysort plant time: gen nb_dup = _N 
gen dup_str = string(identifier)
gen plant_dup = plant + dup_str
encode plant_dup, gen(newid)
sort newid time
xtset newid time

save sinter_bootstrap.dta, replace 

*******************************
*step 2: estimate steel making*
*******************************
local dlwcprogram = "acf_mata_seel.do"

do `dlwcprogram'

use steel_bootstrap.dta, clear

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)5{
         local kmax = 5 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 5 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local mmax = 5 - `ll' - `kk' - `aa'
				   forvalues mm = 0(1)`mmax'{
				    local pmax = 5 - `ll' - `kk' - `mm' - `aa'
					forvalues pp = 0(1)`pmax'{
				    gen poly`ll'`kk'`mm'`aa'`pp' = (l^`ll')*(k^`kk')*(m^`mm')*(age^`aa')*(p^`pp')
				     }
				   }
			  }
		 }
}


*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* dsecond second i.t i.owner i.region"

xi: reg y `regphi'
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age
gen m_lag = L.m
gen dsecond_lag = L.dsecond
gen second_lag = L.second

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.
drop if m ==.
drop if m_lag == .
drop if phi==.
drop if phi_lag==.
drop if age ==.

*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values 
gen initiala = -0.000564216
gen initialk = 0.129031925
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.052450862
gen initialx = sqrt(initiall/(1-initiall))
gen initialm = 0.877800415

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3],beta_dlw[1,4])


putexcel A`reps' = mat(result) using acf_steel_bootstrap, sheet("iron_scrap") modify

matrix drop result

*--------------------------------steel making analysis-------------------------*
import excel "...folder name\acf_steel_bootstrap.xlsx", sheet("iron_scrap") clear
gen obs = _n
keep if obs == `reps'
rename A beta_l
rename B beta_k
rename C beta_m
rename D beta_a
save beta_steel_bootstrap.dta, replace

/*generate material elasticity*/
use beta_steel_bootstrap.dta, clear
keep beta_m 
rename beta_m gamma3

save gamma3.dta, replace 

/*generate steel TFP*/
use steel_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_steel_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age

*normalized tfp 
egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)
gen w_det_steel = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

keep newid plant_dup firm time year tfp w_det_steel 
rename tfp tfp_steel

gen t = string(time)
gen fid = plant_dup + t
drop t 

save steel_tfp_lmk_link_boot.dta, replace 

/*generate controls for upstream*/
use steel_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_steel_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age

*normalized tfp 
egen mean_tfp = mean(tfp)
replace tfp = tfp - mean_tfp
gen w_det_steel = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

drop t
gen t = string(time)

gen fid_iron_steel = firm + t + steel_link /*stack information at firm_linkage_time level, that is at the production chain level; to make sure that every firm every link of each period only has one observation*/

keep if identifier == 1

/*create weighted information*/
bysort fid_iron_steel: egen det_steel = sum(w_det_steel)
bysort fid_iron_steel: gen nb_steel = _N 

drop if tfp == . | k == . | l == .

replace tfp = tfp*w_det_steel/det_steel
bysort fid_iron_steel: egen tfp_steel = sum(tfp) /*logtfp weighted by deterministic components*/
replace k = k*w_det_steel/det_steel
bysort fid_iron_steel: egen k_steel = sum(k)
replace l = l*w_det_steel/det_steel
bysort fid_iron_steel: egen l_steel = sum(l)

bysort fid_iron_steel: gen structure = _n /*keep one observation for one chain at each time t*/
keep if structure == 1 

keep fid_iron_steel tfp_steel l_steel k_steel nb_steel 

save steel_tfp_lmk_iron_bootstrap.dta, replace

/*TFP analysis: steel making*/
use steel_bootstrap.dta, clear

keep newid plant_dup firm time production number size owner region second labor 

gen t = string(time)
gen fid = plant_dup + t
drop t 

merge 1:1 fid using steel_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_steel)

drop if logtfp > 2 | logtfp <= -2 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard errors for the estimates in Table 5 Column (7) and (8)*/
reg logtfp i.ownership i.time
outreg2 using steel_owner_boot, keep(i.ownership) noaster excel append  

reg logtfp i.ownership logsize i.time
outreg2 using steel_owner_size_boot, keep(i.ownership logsize) noaster excel append 

/*generate standard errors for the estimates in Table 8 Column (7) and (8)*/
replace second = 100 if second > 100

reg logtfp i.ownership second i.time
outreg2 using steel_quality_boot, keep(i.ownership second) noaster excel append 

reg logtfp i.ownership logsize second i.time
outreg2 using steel_quality_size_boot, keep(i.ownership logsize second) noaster excel append

**********************************
*step 3: estimate pig iron making*
**********************************
local dlwcprogram = "acf_mata_iron.do"

do `dlwcprogram'

use iron_bootstrap.dta, clear

gen tid = string(time)
gen fid_iron_steel = firm + tid + iron_steel_link

*merge with downstream steel information 
merge m:1 fid_iron_steel using steel_tfp_lmk_iron_bootstrap.dta 

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)5{
         local kmax = 5 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 5 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local mmax = 5 - `ll' - `kk' - `aa'
				   forvalues mm = 0(1)`mmax'{
				    local pmax = 5 - `ll' - `kk' - `mm' - `aa'
					forvalues pp = 0(1)`pmax'{
				    gen poly`ll'`kk'`mm'`aa'`pp' = (l^`ll')*(k^`kk')*(m^`mm')*(age^`aa')*(p^`pp')
				     }
				   }
			  }
		 }
}

*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* i.t i.owner i.region"

xi: reg y `regphi' tfp_steel l_steel k_steel nb_steel 
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*/
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age
gen m_lag = L.m

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.
drop if m ==.
drop if m_lag == .
drop if phi==.
drop if phi_lag==.
drop if age ==.

*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values
gen initiala = -0.000288024
gen initialk = 0.448629832
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.182428734
gen initialx = sqrt(initiall/(1-initiall))
gen initialm = 0.436671586

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3],beta_dlw[1,4])


putexcel A`reps' = mat(result) using acf_iron_bootstrap, sheet("energy") modify

matrix drop result

*---------------------pig-iron making analysis---------------------------------*
import excel "...folder name\acf_iron_bootstrap.xlsx", sheet("energy") clear
gen obs = _n
keep if obs == `reps'
rename A beta_l
rename B beta_k
rename C beta_m
rename D beta_a
save beta_iron_bootstrap.dta, replace

/*generate material elasticity*/
use beta_iron_bootstrap.dta, clear
keep beta_m 
rename beta_m gamma2

save gamma2.dta, replace 

/*generate TFP estimates*/
use iron_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_iron_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age
drop if labor < 1
egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)

/*generate deterministic part*/
gen w_det_iron = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

keep newid plant_dup firm time tfp w_det_iron
rename tfp tfp_iron

gen t = string(time)
gen fid = plant_dup + t
drop t

save iron_tfp_lmk_link_boot.dta, replace

/*generate controls for upstream stage sintering using linkage link_sinter_iron*/
use iron_bootstrap.dta, clear

gen tid = string(time)
gen fid_iron_steel = firm + tid + iron_steel_link

**merge with downstream steel information 
merge m:1 fid_iron_steel using steel_tfp_lmk_iron_bootstrap.dta 
drop _merge

gen beta_l = .
gen beta_k = .
gen beta_m = .
gen beta_a = .

append using beta_iron_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_m = beta_m[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_m*m - beta_a*age
drop if labor < 1
egen mean_tfp = mean(tfp)
replace tfp = tfp - mean_tfp/*keep logtfp here*/

/*generate deterministic part*/
gen w_det_iron = exp(beta_l*l + beta_k*k + beta_m*m + beta_a*age)

//generate controls for sintering 
gen fid_sinter_iron = firm + tid + sinter_iron_link

keep if identifier == 1 /*drop duplicated information otherwise would overcount*/
/*create weighted information*/
bysort fid_sinter_iron: egen det_iron = sum(w_det_iron)
bysort fid_sinter_iron: gen nb_iron = _N

drop if tfp == . | k == . | l == . | tfp_steel == . | k_steel == . | l_steel == . | nb_steel == . 

foreach var of varlist tfp k l tfp_steel k_steel l_steel{
replace `var' =`var'*w_det_iron/det_iron
bysort fid_sinter_iron: egen `var'_iron = sum(`var') /*logtfp weighted by deterministic components*/
}

bysort fid_sinter_iron: egen nb_steel_iron = sum(nb_steel) /*number of equipment don't need weight, simply the total number to reflect the structure*/

bysort fid_sinter_iron: gen structure = _n
keep if structure == 1 

keep fid_sinter_iron tfp_iron l_iron k_iron tfp_steel_iron l_steel_iron k_steel_iron nb_iron nb_steel_iron 

save iron_tfp_lmk_sinter_bootstrap.dta, replace

///TFP analysis: iron making 
use iron_bootstrap.dta, clear

keep newid plant_dup firm time year month production number size owner region labor grade1 

gen t = string(time)
gen fid = plant + t
drop t 

merge 1:1 fid using iron_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_iron)

drop if logtfp > 4 | logtfp < -4 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard errors for the estimates in Table 5 Column (5) and (6)*/
reg logtfp i.ownership i.time
outreg2 using iron_owner_boot, keep(i.ownership) noaster excel append 

reg logtfp i.ownership logsize i.time
outreg2 using iron_owner_size_boot, keep(i.ownership logsize) noaster excel append  

/*generate estimates in Table 8 Column (5) and (6)*/
replace grade1 = 100 if grade1 > 100

reg logtfp i.ownership grade1 i.time
outreg2 using iron_quality_boot, keep(i.ownership grade1) noaster excel append

reg logtfp i.ownership logsize grade1 i.time
outreg2 using iron_quality_size_boot, keep(i.ownership logsize grade1) noaster excel append

****************************
*step 4: estimate sintering* 
****************************

local dlwcprogram = "acf_mata_sinter.do"

do `dlwcprogram'

use sinter_bootstrap.dta, clear

gen tid = string(time)
gen fid_sinter_iron = firm + tid + sinter_link

**merge with downstream steel information 
merge m:1 fid_sinter_iron using iron_tfp_lmk_sinter_bootstrap.dta 

*---------------creating variables---------------------------------------------*
* higher order terms on inputs k l a p

forvalues ll = 0(1)4{
         local kmax = 4 - `ll'
		 forvalues kk = 0(1)`kmax'{
		      local amax = 4 - `ll' - `kk'
			  forvalues aa = 0(1)`amax'{
			       local pmax = 4 - `ll' - `kk' - `aa'
				   forvalues pp = 0(1)`pmax'{
				   gen poly`ll'`kk'`aa'`pp' = (l^`ll')*(k^`kk')*(age^`aa')*(p^`pp')
				   }
			  }
		 }
}


*------FIRST STAGE USING EXP AS INPUT  ----------------------------------------*
local regphi = "poly* i.t i.owner i.region"

xi: reg y `regphi' tfp_steel_iron tfp_iron l_steel_iron l_iron k_steel_iron k_iron nb_iron nb_steel_iron 
predict phi
predict epsilon, res
label var phi "phi_it 
label var epsilon "measurement error first stage

*------------------------------------------------------------------------------*
xtset newid time, monthly
gen phi_lag=L.phi
gen l_lag=L.l
gen k_lag=L.k
gen age_lag = L.age

sort newid t
gen const=1
drop if y==.
drop if l_lag==.
drop if l == .
drop if k==.

drop if phi==.
drop if phi_lag==.
drop if age ==.


*-------------------------------start search-----------------------------------*
**use the gmm estimates as initial values
gen initiala = -0.000515514
gen initialk = 0.877699269
gen initialz = sqrt(initialk/(1-initialk))
gen initiall = 0.187965831
gen initialx = sqrt(initiall/(1-initiall))

dlwc

matrix result = (beta_dlw[1,1]^2/(1+beta_dlw[1,1]^2),beta_dlw[1,2]^2/(1+beta_dlw[1,2]^2),beta_dlw[1,3])


putexcel A`reps' = mat(result) using acf_sinter_bootstrap, sheet("y2_energy") modify

matrix drop result

/*********************************sintering analysis****************************/
import excel "...folder name\acf_sinter_link.xlsx", sheet("y2_energy") clear
sort D
keep if _n == 1
rename A beta_l
rename B beta_k
rename C beta_a
drop D
save beta_sinter_bootstrap.dta, replace

/*generate tfp estimates*/
use sinter_bootstrap.dta, clear

gen beta_l = .
gen beta_k = .
gen beta_a = .

append using beta_sinter_bootstrap.dta

replace beta_l = beta_l[_N]
replace beta_k = beta_k[_N]
replace beta_a = beta_a[_N]
drop if _n == _N

/*generate tfp estimates*/
gen tfp = y - beta_l*l - beta_k*k - beta_a*age

egen mean_tfp = mean(tfp)
replace tfp = exp(tfp - mean_tfp)

/*generate deterministic part*/
gen w_det_sinter = exp(beta_l*l + beta_k*k + beta_a*age)

keep plant_dup firm time tfp w_det_sinter
rename tfp tfp_sinter

gen t = string(time)
gen fid = plant_dup + t
drop t

save sinter_tfp_lmk_link_boot.dta, replace

/*Start ownership analysis*/
use sinter_bootstrap.dta, clear

gen t = string(time)
gen fid = plant_dup + t
drop t 

merge 1:1 fid using sinter_tfp_lmk_link_boot.dta
drop _merge

gen logtfp = log(tfp_sinter)

drop if logtfp > 5 

gen logsize = log(size)
encode owner, gen(ownership)

/*generate standard error for the estimates in Table 5 Column (3) and (4) */ 
reg logtfp i.ownership i.time
outreg2 using sinter_owner_boot, keep(i.ownership) noaster excel append 

reg logtfp i.ownership logsize i.time
outreg2 using sinter_owner_size_boot, keep(i.ownership logsize) noaster excel append

/*generate standard errors for the estimates in Table 8 Column (3) and (4)*/
replace grade_stability = 100 if grade_stability>100

reg logtfp i.ownership grade_stability i.time 
outreg2 using sinter_quality_boot, keep(i.ownership grade_stability) noaster excel append

reg logtfp i.ownership logsize grade_stability i.time
outreg2 using sinter_quality_size_boot, keep(i.ownership logsize grade_stability) noaster excel append 


*********************************
*Step 4: Integrate aggregate TFP*
*********************************
/*Merge stage-level dataset with tfp estimates for sintering*/

use sinter_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region labor grade_stability firm_sinter_iron_dup

drop if production == .

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using sinter_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_sinter)
drop if logtfp > 5 

*adjust outliers of quality measure 
replace grade_stability = 0 if grade_stability < 0
replace grade_stability = 100 if grade_stability > 100

gen fid_sinter_iron = firm_sinter_iron_dup + tid

/*aggregation across plants within stage according to the facility identifier*/
*create weights
bysort fid_sinter_iron: egen det_sinter = sum(w_det_sinter)
bysort fid_sinter_iron: egen pdt_sinter = sum(production)
bysort fid_sinter_iron: gen nb_sinter = _N

gen tfp1_sinter = log(tfp_sinter) 

replace tfp1_sinter = tfp1_sinter*w_det_sinter/det_sinter 
bysort fid_sinter_iron: egen tfp1 = sum(tfp1_sinter) /*tfp by weight 1 using deterministic component*/

drop tfp1_sinter 
rename tfp1 tfp1_sinter

*create weighted quality
replace grade_stability = grade_stability*production/pdt_sinter
bysort fid_sinter_iron: egen sinter_quality1 = sum(grade_stability)

bysort fid_sinter_iron: gen n = _n
keep if n == 1
drop n 

keep firm_sinter_iron_dup fid_sinter_iron det_sinter pdt_sinter tfp1_sinter owner region sinter_quality1 nb_sinter
rename region region_sinter

save sinter_tfp_lmk_link_aggregate_boot.dta, replace

/*Merge stage-level dataset with tfp estimates for pig-iron making*/
use iron_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region grade1 firm_sinter_iron_dup firm_iron_steel_dup

drop if production == .

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using iron_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_iron)
drop if logtfp > 4 | logtfp < -4 

*iron quality outlier 
replace grade1 = 0 if grade1 < 0
replace grade1 = 100 if grade1 > 100

gen fid_sinter_iron = firm_sinter_iron + tid
gen fid_iron_steel = firm_iron_steel + tid 

merge m:1 fid_sinter_iron using sinter_tfp_lmk_link_aggregate_boot.dta
drop _merge

/*aggregation across plants within stage according to the facility identifier*/
**create weights
bysort fid_iron_steel: egen det_iron = sum(w_det_iron)
bysort fid_iron_steel: egen pdt_iron = sum(production)
bysort fid_iron_steel: gen nb_iron = _N

gen tfp1_iron = log(tfp_iron) 
replace tfp1_iron = tfp1_iron*w_det_iron/det_iron 
bysort fid_iron_steel: egen tfp1 = sum(tfp1_iron) /*tfp by weight 1 using deterministic component*/

drop tfp1_iron
rename tfp1 tfp1_iron

*weighted quality
replace grade1 = grade1*production/pdt_iron
bysort fid_iron_steel: egen iron_quality1 = sum(grade1)

*Ajust the weight for sintering plants according to according to fid_iron_steel
bysort fid_iron_steel fid_sinter_iron: gen n = _N
replace det_sinter = det_sinter/n
replace pdt_sinter = pdt_sinter/n
replace size_sinter = size_sinter/n
replace nb_sinter = nb_sinter/n
drop n

bysort fid_iron_steel: egen det_sinter_steel = sum(det_sinter)
bysort fid_iron_steel: egen pdt_sinter_steel = sum(pdt_sinter)
bysort fid_iron_steel: egen nb = sum(nb_sinter)
drop nb_sinter
rename nb nb_sinter 

********************************************************************************
replace tfp1_sinter = tfp1_sinter*det_sinter/det_sinter_steel
bysort fid_iron_steel: egen tfp1 = sum(tfp1_sinter)

drop tfp1_sinter 
rename tfp1 tfp1_sinter

*re-adjust sinter quality 
replace sinter_quality1 = sinter_quality1*pdt_sinter/pdt_sinter_steel
bysort fid_iron_steel: egen quality1 = sum(sinter_quality1)

drop sinter_quality1 
rename quality1 sinter_quality1

bysort fid_iron_steel: gen n = _n
keep if n == 1
drop n

keep owner firm firm_sinter_iron_dup fid_sinter_iron fid_iron_steel det_sinter_steel pdt_sinter_steel det_iron pdt_iron tfp1* sinter_quality* iron_quality1 nb* 

rename firm firm_add
rename owner owner_add

save sinter_iron_tfp_lmk_link_aggregate_boot.dta, replace

/**Merge stage-level dataset with tfp estimates for steel making and then merge with information on iron and sinter*/
use steel_bootstrap.dta, clear

keep firm plant_dup time year month production size owner region second firm_iron_steel_dup

drop if production == . 

gen tid = string(time)
gen fid = plant_dup + tid

merge 1:1 fid using steel_tfp_lmk_link_boot.dta
drop _merge

*trim outliers
gen logtfp = log(tfp_steel) 
drop if logtfp > 2 | logtfp <= -2 

*steel quality outlier 
replace second = 0 if second < 0
replace second = 100 if second > 100


gen fid_iron_steel = firm_iron_steel_dup + tid

/*aggregation across plants within stage according to the facility identifier*/
*create weights
bysort fid_iron_steel: egen det_steel = sum(w_det_steel)
bysort fid_iron_steel: egen pdt_steel = sum(production)
bysort fid_iron_steel: gen nb_steel = _N

gen tfp1_steel = log(tfp_steel) 

replace tfp1_steel = tfp1_steel*w_det_steel/det_steel
bysort fid_iron_steel: egen tfp1 = sum(tfp1_steel) /*tfp by weight 1 using deterministic component*/

drop tfp1_steel 
rename tfp1 tfp1_steel

*weighted quality
replace second = second*production/pdt_steel
bysort fid_iron_steel: egen steel_quality1 = sum(second)

bysort fid_iron_steel: gen n = _n
keep if n == 1
drop n 

merge 1:1 fid_iron_steel using sinter_iron_tfp_lmk_link_aggregate_boot.dta
drop _merge

replace firm = firm_add if firm == ""
replace owner = owner_add if owner == ""

/*construct aggregate tfp using elasticities as weight*/
*method 1: using elasticity
gen gamma3 = .
gen gamme2 = .

append using gamma3.dta

replace gamma3 = gamma3[_N]
drop if _n == _N

append using gamma2.dta

replace gamma2 = gamma2[_N]
drop if _n == _N

gen tfp1 = tfp1_steel + gamma3*tfp1_iron + gamma2*gamma3*tfp1_sinter

save 3stage_tfp_lmk_link_aggregate_boot.dta, replace 

/*start facility-level ownership analysis*/
drop if plant == ""
drop if nb_sinter == 0 | nb_iron == 0 | nb_steel == 0
drop if nb_sinter == . | nb_iron == . | nb_steel == .

encode firm_iron_steel_dup, gen(vinew)
encode owner, gen(ownership) 
gen logsize = log(size_steel) 

xtset vinew time, monthly

/*generate standard errors for the estimates in Table 5 Column (1) and (2)*/
reg tfp1 i.ownership i.time
outreg2 using vi_owner_boot, keep(i.ownership) noaster excel append

reg tfp1 i.ownership logsize i.time
outreg2 using vi_owner_size_boot, keep(i.ownership logsize) noaster excel append

/*generate standard errors for the estimates in Table 8 Column (3) and (4)*/
reg tfp1 i.ownership sinter_quality1 iron_quality1 steel_quality1 i.time
outreg2 using owner_quality_boot, keep(i.ownership) noaster excel append

reg tfp1 i.ownership logsize sinter_quality1 iron_quality1 steel_quality1 i.time
outreg2 using owner_quality_size_boot, keep(i.ownership logsize) noaster excel append


local reps = `reps' + 1
}


